{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "53af08cf-0149-4369-93d6-fba2203ec6cc",
   "metadata": {},
   "source": [
    "# cloud-object-store-housekeeping"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c8e13f0c-0f46-4ffa-982e-620cb2c5ad82",
   "metadata": {},
   "source": [
    "Housekeeping (delete, list, ...) on any S3 compliant Cloud Object Storage"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "05a6d935-38c5-49df-9df2-fbc8c31c4634",
   "metadata": {
    "scrolled": true,
    "tags": []
   },
   "outputs": [],
   "source": [
    "!pip install aiobotocore==2.0.1\n",
    "!pip install botocore==1.22.8\n",
    "!pip install s3fs==2021.11.1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "85859764-8fd2-4394-88d6-b3c54194b868",
   "metadata": {},
   "outputs": [],
   "source": [
    "import logging\n",
    "import os\n",
    "import sys\n",
    "import re\n",
    "import s3fs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a8069e66-fad7-4be9-bc6f-b9eaa119b3bc",
   "metadata": {},
   "outputs": [],
   "source": [
    "access_key_id = os.environ.get('access_key_id')\n",
    "secret_access_key = os.environ.get('secret_access_key')\n",
    "endpoint = os.environ.get('endpoint')\n",
    "bucket_name = os.environ.get('bucket_name')\n",
    "\n",
    "# default: ls, operation in rm (delete), ls (list), walk (walk the tree)\n",
    "operation = os.environ.get('operation', 'ls')\n",
    "\n",
    "# default: sysout, in \"ls\", \"walk\" operation, return list on sysout or as file (provide file name instead of sysout)\n",
    "return_mode = os.environ.get('return_mode', 'sysout')\n",
    "\n",
    "# defaul: empty string, file/folder to operate on\n",
    "path = os.environ.get('path', \"\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ad8bf902-7582-445b-aea1-a9ce869532ee",
   "metadata": {},
   "outputs": [],
   "source": [
    "parameters = list(\n",
    "    map(lambda s: re.sub('$', '\"', s),\n",
    "        map(\n",
    "            lambda s: s.replace('=', '=\"'),\n",
    "            filter(\n",
    "                lambda s: s.find('=') > -1 and bool(re.match(r'[A-Za-z0-9_]*=[.\\/A-Za-z0-9]*', s)),\n",
    "                sys.argv\n",
    "            )\n",
    "    )))\n",
    "\n",
    "for parameter in parameters:\n",
    "    logging.warning('Parameter: ' + parameter)\n",
    "    exec(parameter)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "704117f6-702d-43ee-9092-201c46b31e15",
   "metadata": {},
   "outputs": [],
   "source": [
    "s3 = s3fs.S3FileSystem(\n",
    "    anon=False,\n",
    "    key=access_key_id,\n",
    "    secret=secret_access_key,\n",
    "    client_kwargs={'endpoint_url': endpoint}\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "60436d8a-f461-4723-abeb-cc22b555c782",
   "metadata": {},
   "outputs": [],
   "source": [
    "if operation == 'ls':\n",
    "    result_list = s3.ls(bucket_name)\n",
    "    if return_mode == 'sysout':\n",
    "        print(result_list)\n",
    "elif operation == 'walk':\n",
    "    result_list = s3.walk(bucket_name + '/' + path,  maxdepth=100)\n",
    "    if return_mode == 'sysout':\n",
    "        for item in result_list:\n",
    "            print(item)\n",
    "elif operation == 'rm':\n",
    "    s3.rm(bucket_name + '/' + path, recursive=True, maxdepth=100)\n",
    "else:\n",
    "    raise 'operation unknown: {}'.format(operation)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
